/*    Copyright 2012 Tobias Marschall
 *
 *    This file is part of MoSDi.
 *
 *    MoSDi is free software: you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation, either version 3 of the License, or
 *    (at your option) any later version.
 *
 *    MoSDi is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with MoSDi.  If not, see <http://www.gnu.org/licenses/>.
 */

package mosdi.subcommands;

import java.util.ArrayList;
import java.util.List;

import mosdi.fa.CDFA;
import mosdi.fa.DFAFactory;
import mosdi.fa.FiniteMemoryTextModel;
import mosdi.fa.GeneralizedString;
import mosdi.fa.GeneralizedStringsNFA;
import mosdi.fa.IIDTextModel;
import mosdi.fa.NFA;
import mosdi.fa.NodeLimitExceededException;
import mosdi.paa.PAA;
import mosdi.paa.TextBasedPAA;
import mosdi.paa.apps.MatchCountDAA;
import mosdi.util.Alphabet;
import mosdi.util.Log;
import mosdi.util.iterators.AbelianPatternInstanceIterator;
import mosdi.util.iterators.LexicographicalIterator;

public class AllSeedsSubcommand extends Subcommand {

	@Override
	public String usage() {
		return
		super.usage()+" [options] <length> <wildcards>\n" +
		"\n" +
		"  Iterates over all seeds of given length with the given number of wildcards\n" + 
		"  and computes the distribution of the number of random hits for each seed.\n" +
		"\n" +
		"  Assumes an ungapped i.i.d. homology model, i.e. an i.i.d. text model\n" +
		"  over {0,1}, where 0 stands for a mismatch and 1 stands for a match.\n" +
		"\n" +
		"Options:\n" +
		"  -p <prob>: homology level (probability of a match, default:0.95)\n" +
		"  -l <length>: length of random sequence to consider (default: 64)\n" +
		"  -k <maxhits>: compute distribution up to <maxhits> (default: 1)";
	}

	@Override
	public String description() {
		return "Calculates statistics for all alignment seeds within a given class.";
	}

	@Override
	public String name() {
		return "all-seeds";
	}

	@Override
	public int run(String[] args) {
		parseOptions(args, 2, "p:l:k:");

		// Option dependencies
		// NONE

		// Mandatory arguments
		int seedLength = getIntArgument(0);
		int seedWildcards = getIntArgument(1);

		// Options
		double matchProbability = this.getRangedDoubleOption("p", 0.0, 1.0, 0.95);
		int length = getPositiveIntOption("l", 64);
		int maxHits = getPositiveIntOption("k", 1);

		Alphabet alphabet = new Alphabet("01");
		double[] characterDistribution = {1.0-matchProbability, matchProbability};
		FiniteMemoryTextModel textModel = new IIDTextModel(alphabet.size(), characterDistribution); 

		Alphabet seedAlphabet = new Alphabet("1*");
		int[] frequencies = {seedWildcards, seedLength-seedWildcards};
		LexicographicalIterator iterator = new AbelianPatternInstanceIterator(frequencies);
		Log.println(Log.Level.STANDARD, "Format: >>seed>> seed >>runtime>> runtime >>hit-count-dist>> hit-count-distribution ");
		while (iterator.hasNext()) {
			int[] seed = iterator.next();
			if (seed[0] == 0) {
				iterator.skip(0);
				continue;
			}
			if (seed[seed.length-1] == 0) {
				continue;
			}
			String seedStr = seedAlphabet.buildString(seed);
			GeneralizedString seedGenString = new GeneralizedString(alphabet, seedStr, '*');
			// construct automaton
			CDFA cdfa = null;
			List<GeneralizedString> genStringList = new ArrayList<GeneralizedString>();
			genStringList.add(seedGenString);
			int states = -1;
			int statesMinimal = -1;
			Log.startTimer();
			try {
				NFA nfa;
				nfa = new GeneralizedStringsNFA(genStringList); 
				cdfa = DFAFactory.build(alphabet, nfa, 50000);
				states = cdfa.getStateCount();
				Log.printf(Log.Level.VERBOSE, "DFA states: \"%d\"%n", states);
				cdfa = cdfa.minimizeHopcroft();
				statesMinimal = cdfa.getStateCount();
				Log.printf(Log.Level.VERBOSE, "DFA states (after minimization): \"%d\"%n", statesMinimal);
			} catch (NodeLimitExceededException e) {
				Log.println(Log.Level.STANDARD, "Skipping: node limit exceeded (switch -N)");
				return 1;
			}
			if (Log.levelAtLeast(Log.Level.DEBUG)) {
				Log.print(Log.Level.DEBUG, cdfa.toString());
			}
			// resulting distribution of matches we are interested in
			double[] matchDistribution = null;
			MatchCountDAA daa = new MatchCountDAA(cdfa, maxHits);
			PAA paa = new TextBasedPAA(daa, textModel);
			double[][] stateValueDistribution = paa.computeStateValueDistribution(length);
			matchDistribution = paa.toValueDistribution(stateValueDistribution);
			Log.stopTimer("Computing PAA hit count distribution");
			double time = Log.getLastPeriodCpu();
			StringBuilder sb = new StringBuilder();
			sb.append(String.format(">>seed>> %s ", seedStr));
			sb.append(Log.format(">>runtime>> %t ", time));
			sb.append(">>dist>> ");
			for (int i=0; i<=maxHits; ++i) {
				sb.append(String.format("%e ", matchDistribution[i]));
			}
			Log.println(Log.Level.STANDARD, sb.toString());
		}
		return 0;
	}
}


